Library calls

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(readr)
library(patchwork)

knitr::opts_chunk$set(
    echo = TRUE,
    warning = FALSE,
    fig.width = 8, 
  fig.height = 6,
  out.width = "90%")

theme_set(theme_minimal() + theme(legend.position = "bottom"))

options(
  ggplot2.continuous.colour = "viridis",
  ggplot2.continuous.fill = "viridis")

scale_colour_discrete = scale_colour_viridis_d
scale_fill_discrete = scale_fill_viridis_d

Rat Sightings Data

Load and clean NYC rat sighting data:

rat_df = read_csv("Rat_Sightings.csv") %>%
  janitor::clean_names() %>%
  mutate(created_date = gsub(" .*","", created_date),
         address_type = str_to_title(address_type),
         city = str_to_title(city),
         borough = str_to_title(borough),
         location_type = recode(location_type, "Other (Explain Below)" = "Other")) %>%
  separate(created_date, into = c("month", "day", "year"), sep = "/") %>%
  mutate(year = as.numeric(year)) %>%
  filter(borough != "Unspecified") %>%
  select(month, day, year, location_type, address_type, city, borough, latitude, longitude)
## Rows: 207580 Columns: 38
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (25): Created Date, Closed Date, Agency, Agency Name, Complaint Type, De...
## dbl  (6): Unique Key, Incident Zip, X Coordinate (State Plane), Y Coordinate...
## lgl  (7): Vehicle Type, Taxi Company Borough, Taxi Pick Up Location, Bridge ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Load NYC asthma emergency visit data for adults:

asthma_df = read_csv("asthma_ER_visits_adults.csv") %>%
  janitor::clean_names() %>%
  rename(
    year = time
  ) %>%
  filter(geo_type == "Borough") %>%
  group_by(year, geography) %>%
  select(year, geography, age_adjusted_rate_per_10_000, estimated_annual_rate_per_10_000, number)
## Rows: 584 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): GeoType, Geography, Age-adjusted rate per 10,000, Estimated annual ...
## dbl (3): Time, GeoID, GeoRank
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Merge datasets by year and borough:

asthma_rat_df <- left_join(rat_df, asthma_df, by = c('year' = 'year', 'borough' = 'geography'))

Map sightings by coordinate: # change colors to show which borough has the highest sightings

rat_map =
  rat_df %>% 
  plot_ly(
    lat = ~latitude, 
    lon = ~longitude, 
    type = "scattermapbox", 
    mode = "markers", 
    alpha = 0.2,
    color = ~borough) %>% 
  layout(
    mapbox = list(
      style = 'carto-positron',
      zoom = 9,
      center = list(lon = -73.9, lat = 40.7)),
    title = "<b> Map of Rat Sightings </b>",
      legend = list(title = list(text = "Borough", size = 9),
                    orientation = "v",
                   font = list(size = 9)))
rat_map

Heat map w/density??

rat_df %>%
  plot_ly(
    type = 'densitymapbox',
    lat = ~latitude,
    lon = ~longitude,
    coloraxis = 'coloraxis',
    radius = 10,
    color = ~borough) %>%
  layout(
    mapbox = list(
      style = "stamen-terrain",
      zoom = 9,
      center = list(lon = -73.9, lat = 40.7)), 
    coloraxis = list(colorscale = "Viridis"))

Plot of sightings over time in NYC overall:

overall_rat_line =
  rat_df %>%
  group_by(year) %>%
  count() %>%
  summarise(n_obs = n) %>% 
  ggplot(aes(x = year, y = n_obs)) + 
  geom_line() +
  labs(
    title = "Rat Sightings Over Time in NYC",
    x = "Year",
    y = "Number of Sightings")

overall_rat_line

Plot of sightings over time in NYC by borough:

rat_line = rat_df %>% 
  group_by(borough, year) %>%  
  count() %>%
  summarise(n_obs = n) %>% 
  ggplot(aes(x = year, y = n_obs , color = borough )) + 
  geom_line() +
  scale_x_continuous(breaks = seq(2010, 2022, by = 1)) +
  labs(
    title = "Rat Sightings Over Time by Borough",
    x = "Year",
    y = "Number of Sightings")
## `summarise()` has grouped output by 'borough'. You can override using the
## `.groups` argument.
rat_line

Bar graph of all sightings by borough:

rat_bar = 
  rat_df %>% 
  count(borough) %>% 
  mutate(borough = fct_reorder(borough, n)) %>% 
  ggplot(aes(x = borough, y = n, fill = borough)) + 
  geom_bar(stat = "identity") +
  labs(
    title = "Frequency of Rat Sightings by Borough (2010-2022)",
    x = "Borough",
    y = "Number of Sightings",
    fill = "Borough")
  
  #plot_ly(x = ~borough, y = ~n, color = ~borough, type = "bar", colors = "viridis")

rat_bar

Plot of frequency of sightings by borough and year:

rat_bar_time =
  rat_df %>% 
  group_by(borough) %>%
  count(year) %>% 
  ggplot(aes(x = year, y = n, fill = borough)) +
  geom_bar(stat = "identity",
           position = "dodge") +
  scale_x_continuous(breaks = seq(2010, 2022, by = 1)) +
  labs(
    title = "Frequency of Sightings in Boroughs",
    x = "Year",
    y = "Number of Sightings",
    fill = "Borough")

rat_bar_time

Location type:

location_bar =
  rat_df %>%
  count(location_type) %>%
   mutate(
    location_type = fct_reorder(location_type, n),
    ranking = min_rank(desc(n))) %>% 
  filter(ranking <= 10) %>% 
  arrange(n) %>%
  ggplot(aes(x = location_type, y = n, fill = location_type)) + 
  geom_bar(stat = "identity") +
  labs(title = "Top 10 Location Types",
       x = "Location Type",
       y = "Number of Sightings",
       fill = "location_type") + coord_flip() +
  theme(legend.position = "none") 

location_bar

location_grid =
  rat_df %>%
  group_by(borough, location_type) %>%
  count() %>%
  summarise(n_obs = n) %>%
   mutate(
    location_type = fct_reorder(location_type, n_obs),
    ranking = min_rank(desc(n_obs))) %>% 
  filter(ranking <= 3) %>% 
  arrange(ranking) %>%
  ggplot(aes(x = location_type, y = n_obs, fill = location_type)) + 
  geom_bar(stat = "identity") +
  facet_grid(. ~ borough) +
  labs(title = "Top Sighting Locations by Borough",
       x = "Location Type",
       y = "Number of Sightings",
       fill = "location_type") +
  theme(axis.text.x = element_blank())
## `summarise()` has grouped output by 'borough'. You can override using the
## `.groups` argument.
  #theme(axis.text.x = element_text(angle = 90, hjust = 1))

location_grid

Rat asthma plot:

asthma_line =
  asthma_rat_df %>%
  filter(year > 2009) %>%
  mutate(age_adjusted_rate_per_10_000 = as.numeric(age_adjusted_rate_per_10_000, na.rm = TRUE)) %>%
  group_by(year) %>%
  summarize(mean_age_adjust = mean(age_adjusted_rate_per_10_000)) %>%
  drop_na(mean_age_adjust) %>%
  ggplot(aes(x = year, y = mean_age_adjust)) + 
  geom_line()

asthma_line + overall_rat_line

ASTHMA PLOTS

Asthma cases in nyc over time

asthma_num_df =
  asthma_df %>%
  mutate(
    number = as.numeric(number), 
    data = "Total asthma ER visits"
  ) %>%
  group_by(year, data) %>%
  summarise(n_obs = sum(number, na.rm = TRUE))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
asthma_rate_df =
  asthma_df %>%
  mutate(
    age_adjusted_rate_per_10_000 = as.numeric(age_adjusted_rate_per_10_000),
    data = "Age adjusted asthma ER visit rate"
  ) %>%
  group_by(year, data) %>%
  summarise(n_obs = mean(age_adjusted_rate_per_10_000, na.rm = TRUE))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
asthma_estimated_df =
  asthma_df %>%
  mutate(
    estimated_annual_rate_per_10_000 = as.numeric(estimated_annual_rate_per_10_000), 
    data = "Estimated annual asthma rate"
  ) %>%
  group_by(year, data) %>%
  summarise(n_obs = mean(estimated_annual_rate_per_10_000, na.rm = TRUE))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
asthma_years_num_plot =
  asthma_num_df %>%
  ggplot(aes(x = year, y = n_obs)) + 
  geom_line() +
  scale_x_continuous(breaks = seq(2005, 2018, 1)) +
  labs(title = "Total number of asthma ER visits per year",
       x = "Year",
       y = "Total number of asthma ER visits")
  

asthma_rate_estimated_plot = 
  ggplot(data = asthma_rate_df, aes(x = year, y = n_obs, color = data)) + 
  geom_line() +
  geom_line(data = asthma_estimated_df) +
  scale_x_continuous(breaks = seq(2005, 2018, 1)) +
  labs(title = "Age adjusted vs. Estimated Asthma Rates Over The Years",
       x = "Year",
       y = "Rate per 10,000 people")
  

asthma_years_num_plot

asthma_rate_estimated_plot

Mean asthma rate and estimated asthma rate per borough

asthma_rate_borough_plot = 
  asthma_df %>% 
  group_by(geography) %>% 
  mutate(
    age_adjusted_rate_per_10_000 = as.numeric(age_adjusted_rate_per_10_000),
    geography = as.factor(geography)
  ) %>%
  summarise(
    asthma_rate = mean(age_adjusted_rate_per_10_000, na.rm = TRUE)
  ) %>%
  mutate(geography = fct_reorder(geography, asthma_rate)) %>% 
  ggplot(aes(x = geography, y = asthma_rate, fill = geography)) + 
  geom_bar(stat = "identity") +
  labs(
    title = "Mean asthma rate per borough (2004-2018)",
    x = "Borough",
    y = "Mean Age-Adjusted Asthma Rate per 10,000",
    fill = "Borough")

asthma_rate_borough_plot

Asthma rate per borough over time

asthma_borough_time_plot = 
  asthma_df %>% 
  group_by(geography, year) %>% 
  mutate(
    age_adjusted_rate_per_10_000 = as.numeric(age_adjusted_rate_per_10_000),
    geography = as.factor(geography)
  ) %>%
  summarise(
    asthma_rate = mean(age_adjusted_rate_per_10_000, na.rm = TRUE)
  ) %>%
  ggplot(aes(x = year, y = asthma_rate, fill = geography)) +
  geom_bar(stat = "identity",
           position = "dodge") +
  scale_x_continuous(breaks = seq(2005, 2018, by = 1)) +
  labs(
    title = "Mean Asthma ER Visits in Boroughs Over Time",
    x = "Year",
    y = "Mean Asthma ER Visits per 10,000",
    fill = "Borough")
## `summarise()` has grouped output by 'geography'. You can override using the
## `.groups` argument.
asthma_borough_time_plot

add red line to indicate missing data in 2015 and 2017